import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, LayerNormalization, MultiHeadAttention, Dropout, Add
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
dataset= pd.read_csv('Electricity_Consumption.csv')
df=dataset.drop(['DATE'], axis=1)
df.head(2)
Humidity Temperature Electricity
0 92 -1.1 914
1 92 -1.5 887
2 91 -1.5 865
3 88 -1.1 852
4 87 -1.3 852
... ... ... ...
8755 95 -1.2 1180
8756 93 0.8 1133
8757 94 1.9 1073
8758 93 2.2 994
8759 95 2.6 919

8760 rows × 3 columns

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load dataset
dataset = pd.read_csv('Electricity_Consumption.csv')

X= dataset[['Humidity','Temperature']]
y= dataset['Electricity']
# Split into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train).reshape(-1,1)
y_test = np.array(y_test).reshape(-1,1)
from sklearn.preprocessing import MinMaxScaler
sc_X=MinMaxScaler()
X_train=sc_X.fit_transform(X_train)
X_test=sc_X.transform(X_test)
sc_y=MinMaxScaler()
y_train=sc_y.fit_transform(y_train)
y_test=sc_y.transform(y_test)

# Define sequence length (time steps)
seq_length = 50  # Number of past time steps to use for prediction

# Function to create sequences
def create_sequences(data, seq_length):
    X_train, X_test, y_train, y_test = [], [],[],[]
    for i in range(len(data) - seq_length):
        X_train.append(data[i:i+seq_length])
        X_test.append(data[i:i+seq_length])  # Sequence of past time steps
        y_train.append(data[i+seq_length]) 
        y_test.append(data[i+seq_length])# Predict the next step
    return np.array(X_train), np.array(X_test),np.array(y_train),np.array(y_test)

# Generate X (input) and y (output)
X_train, X_test, y_train, y_test = create_sequences(data, seq_length)

print("Training Data Shape:", X_train.shape, y_train.shape)
print("Validation Data Shape:", X_val.shape, y_val.shape)
Training Data Shape: (8710, 50, 3) (8710, 3)
Validation Data Shape: (1752, 2) (1752,)
def positional_encoding(seq_length, d_model):   
    pos = np.arange(seq_length)[:, np.newaxis]
    i = np.arange(d_model)[np.newaxis, :]
    angles = pos / np.power(10000, (2 * (i // 2)) / d_model)
    angles[:, 0::2] = np.sin(angles[:, 0::2])  # Apply sin to even indices
    angles[:, 1::2] = np.cos(angles[:, 1::2])  # Apply cos to odd indices
    return tf.constant(angles, dtype=tf.float32)
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    # Multi-Head Self Attention
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(inputs, inputs)
    x = Dropout(dropout)(x)
    x = Add()([x, inputs])
    x = LayerNormalization(epsilon=1e-6)(x)

    # Feed Forward Network
    x_ff = Dense(ff_dim, activation="relu")(x)
    x_ff = Dropout(dropout)(x_ff)
    x_ff = Dense(inputs.shape[-1])(x_ff)
    x = Add()([x, x_ff])
    x = LayerNormalization(epsilon=1e-6)(x)

    return x
def build_transformer_model(input_shape, head_size=64, num_heads=4, ff_dim=128, num_layers=2, dropout=0.1):
    inputs = Input(shape=input_shape)
    x = inputs

    # Add positional encoding
    seq_length, d_model = input_shape
    pos_encoding = positional_encoding(seq_length, d_model)
    x = inputs + pos_encoding  
    
    # Stacking multiple Transformer encoder blocks
    for _ in range(num_layers):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    # Global average pooling for output
    x = keras.layers.GlobalAveragePooling1D()(x)
    outputs = Dense(1)(x)  

    return Model(inputs, outputs)

# Model Summary
input_shape = (50, 3)  # (Time Steps, Features)
forecast_horizon = 10
#model = build_transformer_model(input_shape,forecast_horizon )
#model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.summary()
Model: "functional_6"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)         Output Shape          Param #  Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ input_layer_6       │ (None, 50, 3)     │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ add_24 (Add)        │ (None, 50, 3)     │          0 │ input_layer_6[0]… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ multi_head_attenti… │ (None, 50, 3)     │        603 │ add_24[0][0],     │
│ (MultiHeadAttentio… │                   │            │ add_24[0][0]      │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_37          │ (None, 50, 3)     │          0 │ multi_head_atten… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ add_25 (Add)        │ (None, 50, 3)     │          0 │ dropout_37[0][0], │
│                     │                   │            │ add_24[0][0]      │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ layer_normalizatio… │ (None, 50, 3)     │          6 │ add_25[0][0]      │
│ (LayerNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_30 (Dense)    │ (None, 50, 128)   │        512 │ layer_normalizat… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_38          │ (None, 50, 128)   │          0 │ dense_30[0][0]    │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_31 (Dense)    │ (None, 50, 3)     │        387 │ dropout_38[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ add_26 (Add)        │ (None, 50, 3)     │          0 │ layer_normalizat… │
│                     │                   │            │ dense_31[0][0]    │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ layer_normalizatio… │ (None, 50, 3)     │          6 │ add_26[0][0]      │
│ (LayerNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ multi_head_attenti… │ (None, 50, 3)     │        603 │ layer_normalizat… │
│ (MultiHeadAttentio… │                   │            │ layer_normalizat… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_40          │ (None, 50, 3)     │          0 │ multi_head_atten… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ add_27 (Add)        │ (None, 50, 3)     │          0 │ dropout_40[0][0], │
│                     │                   │            │ layer_normalizat… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ layer_normalizatio… │ (None, 50, 3)     │          6 │ add_27[0][0]      │
│ (LayerNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_32 (Dense)    │ (None, 50, 128)   │        512 │ layer_normalizat… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_41          │ (None, 50, 128)   │          0 │ dense_32[0][0]    │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_33 (Dense)    │ (None, 50, 3)     │        387 │ dropout_41[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ add_28 (Add)        │ (None, 50, 3)     │          0 │ layer_normalizat… │
│                     │                   │            │ dense_33[0][0]    │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ layer_normalizatio… │ (None, 50, 3)     │          6 │ add_28[0][0]      │
│ (LayerNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ global_average_poo… │ (None, 3)         │          0 │ layer_normalizat… │
│ (GlobalAveragePool… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_34 (Dense)    │ (None, 1)         │          4 │ global_average_p… │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 9,098 (35.54 KB)
 Trainable params: 3,032 (11.84 KB)
 Non-trainable params: 0 (0.00 B)
 Optimizer params: 6,066 (23.70 KB)
from keras_tuner import BayesianOptimization

def model_builder(hp):
    head_size = hp.Int("head_size", 32, 128, step=32)
    num_heads = hp.Int("num_heads", 2, 8, step=2)
    ff_dim = hp.Int("ff_dim", 64, 512, step=64)
    dropout = hp.Float("dropout", 0.1, 0.5, step=0.1)

    model = build_transformer_model(head_size=head_size, num_heads=num_heads, ff_dim=ff_dim, dropout=dropout, input_shape=input_shape)
    model.compile(optimizer="adam", loss="mse")
    return model

tuner = BayesianOptimization(model_builder, objective="val_loss", max_trials=5, directory="tuner")

tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters: {best_hps.values}")
Trial 5 Complete [00h 02m 56s]
val_loss: 0.05065935477614403

Best val_loss So Far: 0.05065935477614403
Total elapsed time: 00h 16m 58s
Best Hyperparameters: {'head_size': 64, 'num_heads': 6, 'ff_dim': 128, 'dropout': 0.1}
best_model = tuner.get_best_models(num_models=1)[0]
D:\Anaconda\Lib\site-packages\keras\src\saving\saving_lib.py:757: UserWarning: Skipping variable loading for optimizer 'adam', because it has 2 variables whereas the saved optimizer has 70 variables. 
  saveable.load_own_variables(weights_store.get(inner_path))
history = best_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=32)
Epoch 1/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 44s 91ms/step - loss: 0.0513 - val_loss: 0.0504
Epoch 2/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 18s 65ms/step - loss: 0.0506 - val_loss: 0.0506
Epoch 3/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 62ms/step - loss: 0.0515 - val_loss: 0.0505
Epoch 4/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 18s 65ms/step - loss: 0.0508 - val_loss: 0.0509
Epoch 5/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 16s 57ms/step - loss: 0.0506 - val_loss: 0.0502
Epoch 6/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 15s 56ms/step - loss: 0.0505 - val_loss: 0.0503
Epoch 7/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 15s 54ms/step - loss: 0.0504 - val_loss: 0.0508
Epoch 8/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 16s 59ms/step - loss: 0.0505 - val_loss: 0.0504
Epoch 9/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 61ms/step - loss: 0.0509 - val_loss: 0.0502
Epoch 10/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 62ms/step - loss: 0.0500 - val_loss: 0.0504
Epoch 11/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 63ms/step - loss: 0.0503 - val_loss: 0.0498
Epoch 12/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 24s 87ms/step - loss: 0.0502 - val_loss: 0.0498
Epoch 13/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 46s 104ms/step - loss: 0.0501 - val_loss: 0.0498
Epoch 14/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 16s 57ms/step - loss: 0.0499 - val_loss: 0.0500
Epoch 15/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 60ms/step - loss: 0.0502 - val_loss: 0.0501
Epoch 16/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 22s 65ms/step - loss: 0.0499 - val_loss: 0.0499
Epoch 17/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 15s 54ms/step - loss: 0.0505 - val_loss: 0.0499
Epoch 18/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 19s 69ms/step - loss: 0.0501 - val_loss: 0.0497
Epoch 19/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 23s 85ms/step - loss: 0.0503 - val_loss: 0.0498
Epoch 20/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 27s 99ms/step - loss: 0.0506 - val_loss: 0.0499
preds = model.predict(X_test)
y_pred= sc_y.inverse_transform(preds)
print("Sample Prediction:", y_pred[:10])
273/273 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
Sample Prediction: [[1193.0508]
 [1180.5586]
 [1186.2701]
 [1191.1058]
 [1191.3317]
 [1189.4683]
 [1191.2549]
 [1182.8384]
 [1194.8209]
 [1197.2668]]